library(knitr)
library(tidyverse)
library(haven)
library(RColorBrewer)
library(lubridate)
library(here)
library(zipcode)
library(foreign)

fips_kansas_city <- c("Cass" = 29037, "Clay" = 29047, "Jackson" = 29095, "Platte" = 29165)
fips_joplin <- c("Jasper" = 29097, "Newton" = 29145)
fips_nyc <- c("Bronx" = 36005, 
              "Brooklyn/Kings" = 36047, 
              "Manhattan/New York" = 36061, 
              "Queens" = 36081, 
              "Staten Island/Richmond" = 36085)


#####
## Import data and do some basic cleaning of it.
######
## Warshaw-Tausanovitch County Ideology data. I use these to get the state codes for each county. This data also includes
## county ideolgoy and presidential vote that we might decide to use later.
county_TW_ideology_estimates<-read.csv(file="county_TW_ideology_estimates.csv")
county_TW_ideology_estimates<-select(county_TW_ideology_estimates, county_fips,county_name, state,abb,mrp_ideology_mean,obama_pc_t2012)
county_TW_ideology_estimates$county_fips<-as.numeric(as.vector(county_TW_ideology_estimates$county_fips))

## Change fips for NYC, Kansas City, and Joplin to the appropriate new value...
county_TW_ideology_estimates$county_fips[county_TW_ideology_estimates$county_fips %in% fips_nyc] <- "36005"
county_TW_ideology_estimates$county_fips[county_TW_ideology_estimates$county_fips %in% fips_kansas_city] <- "Kansas City (MO)"
county_TW_ideology_estimates$county_fips[county_TW_ideology_estimates$county_fips %in% fips_joplin] <- "Joplin (MO)"

states<-group_by(county_TW_ideology_estimates, state, abb)%>%summarise(n=n())
states<-subset(states, state!=".")
states<-select(states,-n)


### Import a file with a list of the dates we'll include in our analysis. We'll use this to expand out the COVID
##  data to generate zeros for days with no COVID deaths/cases
dates<-read.csv(file="dates.csv")
dates<-as.data.frame(dates)
colnames(dates)<-"date"
dates$date<-as.character(dates$date)


### pull number of cases from NYTimes Github
#nyt_county_data_raw <-
#  read_csv(
 #   'https://raw.githubusercontent.com/nytimes/covid-19-data/master/us-counties.csv'
 # ) 
#nyt_county_data_raw$fips<-as.numeric(as.vector(nyt_county_data_raw$fips))
## make backup of NYT data
#write.csv(nyt_county_data_raw, file="data/raw/nyt_county_data_raw.csv")

## For replication purposes, let's use a copy of the data we saved.
nyt_county_data_raw<-read_csv("nyt-covid-us-counties.csv")
nyt_county_data_raw$fips<-as.numeric(as.vector(nyt_county_data_raw$fips))

## Make NYC fips code to the Bronx fip code so it merges properly to survey data 
nyt_county_data_raw$fips[nyt_county_data_raw$county=="New York City"]<-"36005"

# Recode "Kansas City" and Cass, Clay, Jackson, Platte counties to 
# Recode Cass, Clay, Jackson, Platte counties to "Kansas City". Previously,
# Kansas City recoded to Jackson County (29095). This means future data.frames
# will need these changes before merging with NYT data (e.g. Nationscape survey data).
nyt_county_data_raw$fips[nyt_county_data_raw$fips %in% fips_kansas_city] <- "Kansas City (MO)"
nyt_county_data_raw$fips[nyt_county_data_raw$county == "Kansas City"] <- "Kansas City (MO)"
nyt_county_data_raw$fips[nyt_county_data_raw$fips %in% fips_joplin] <- "Joplin (MO)"
nyt_county_data_raw$fips[nyt_county_data_raw$county == "Joplin"] <- "Joplin (MO)"

# pull county demographics and join to cases by FIPS
#tr_fip_pop_raw <- read_csv('https://raw.githubusercontent.com/tylerreny/county_demos/master/county_pop_1418.csv')
tr_fip_pop_raw <- read_csv('county_pop_1418.csv')
tr_fip_pop_raw$fips<-as.numeric(as.vector(tr_fip_pop_raw$fips))
tr_fip_pop_raw<-merge( tr_fip_pop_raw,county_TW_ideology_estimates, by.x="fips", by.y="county_fips")

state_pop<-group_by(tr_fip_pop_raw, state)%>%
	summarise(			total_pop=sum(total_pop, na.rm=T)
		)
dim(state_pop)

## Make a matrix with all dates since beginning of 2020. 
## Make COVID cases and deaths zeros if they're missing
nyt_county_data_raw$cases[is.na(nyt_county_data_raw$cases)]<-0
nyt_county_data_raw$deaths[is.na(nyt_county_data_raw$deaths)]<-0
nyt_county_data_raw<-select(nyt_county_data_raw, -county)
nyt_county_data_raw$month<-as.numeric(as.vector(substr(nyt_county_data_raw$date, 6,7)))
nyt_county_data_raw<-filter(nyt_county_data_raw, month<6)

########
#### Create data for state level Analysis
########
state_analysis<-group_by(nyt_county_data_raw, state,date)%>%
	summarise(	
#		total_pop=sum(total_pop, na.rm=T),
		cases_cumulative=sum(cases, na.rm=T),
		deaths_cumulative=sum(deaths, na.rm=T)
#		state_cases_per_100000=cases_cumulative/total_pop*100000,
#		state_deaths_per_100000=deaths_cumulative/total_pop*100000
		)
dim(state_analysis)
state_analysis<-filter(state_analysis, state!=".")
state_analysis<-filter(state_analysis, date=="2020-05-31")
state_analysis<-merge(state_analysis, state_pop, by="state")

state_analysis<-mutate(state_analysis,
		state_cases_per_100000=cases_cumulative/total_pop*100000,
		state_deaths_per_100000=deaths_cumulative/total_pop*100000
		)
state_analysis<-merge(state_analysis, states, by="state")


##import the survey data
survey<-read.dta( "nationscape_covid_analysis.dta")

survey$state_weight[is.na(survey$state_weight)]<-survey$weight[is.na(survey$state_weight)]

## create a variable for Biden vs. Trump voting intention
survey$vote_pres<-NA
survey$vote_pres[survey$trump_biden==1]<-0
survey$vote_pres[survey$trump_biden==2]<-1

survey$vote_house<-NA
survey$vote_house[survey$house_intent==1]<-0
survey$vote_house[survey$house_intent==2]<-1

survey$vote_senate<-NA
survey$vote_senate[survey$senate_intent==1]<-0
survey$vote_senate[survey$senate_intent==2]<-1
survey$vote_senate[
survey$state=="CA" | 
survey$state=="CT" |
survey$state=="DC" |
survey$state=="FL" |
survey$state=="HI" |
survey$state=="IN" | 
survey$state=="MD" |
survey$state=="MO" | 
survey$state=="ND" | 
survey$state=="NV" | 
survey$state=="NY" |
survey$state=="OH" |
survey$state=="PA" |
survey$state=="UT" | 
survey$state=="VT" |
survey$state=="WA" | 
survey$state=="WI" ]<-NA

## Import data on regions
regions<-read.csv("bea_regions.csv")
regions<-select(regions, abb, bea_region)


### Create simplified presidential approval variables
survey$pres_approval2<-NA
survey$pres_approval2[survey$pres_approval==1| survey$pres_approval==2]<-1
survey$pres_approval2[survey$pres_approval==3| survey$pres_approval==4]<-0

## create variables with the month and year of the survey interview
survey$month<-as.numeric(as.vector(substr(survey$date, 6,7)))
survey$year<-as.numeric(as.vector(substr(survey$date, 1,4)))
survey$day<-as.numeric(as.vector(substr(survey$date, 9,10)))

survey$period<-NA
survey$period[(survey$year==2020 & survey$month<3) ] <-0
survey$period[(survey$year==2020 & survey$month>5 & survey$month<7) | (survey$year==2020 & survey$month>5 & survey$month<8 & survey$day<3)]<-1
library(survey)

survey$education5<-NA
survey$education5[survey$education<4]<-1
survey$education5[survey$education==4]<-2
survey$education5[survey$education>4& survey$education<8]<-3
survey$education5[survey$education==8]<-4
survey$education5[survey$education>8]<-5



srs_design_survey <- svydesign(ids = ~1, weights=survey$state_weight, data = survey)

######
### Graph of Trump approval
######
approval<-svyby(~pres_approval2, ~state+period, srs_design_survey, svymean, na.rm=T)
approval<-select(approval, -se)
approval<-pivot_wider(approval, values_from=pres_approval2, names_from=period)
approval$delta<-approval$`1`-approval$`0`

approval<-merge(approval, state_analysis, by.x="state", by.y="abb")
summary(lm(delta~(state_deaths_per_100000), data=approval))
summary(lm(delta~log(state_deaths_per_100000), data=approval))
summary(lm(delta~log(deaths_cumulative), data=approval))
summary(lm(delta~log(state_deaths_per_100000), data=approval, weight=total_pop))

approval_national<-svyby(~pres_approval2, ~period, srs_design_survey, svymean, na.rm=T)
approval_national<-select(approval_national, -se)
approval_national<-pivot_wider(approval_national, values_from=pres_approval2, names_from=period)
approval_national$delta<-approval_national$`1`-approval_national$`0`


library(ggplot2)
c <- ggplot(data=subset(approval), aes(log(state_deaths_per_100000),delta, label=state))
c <- c + geom_text(size=2)
c <- c + stat_smooth(method = "lm")
c <- c + ggtitle("Association between COVID and Trump Approval")
c <- c + ylab("Change in Trump Approval 
from January/February to June")
c <- c + xlab("Log(COVID Deaths per 100,000)") 
 c <- c + theme_bw() +theme(axis.title=element_text(size=8),axis.text=element_text(size=8))+
  theme(plot.title = element_text(hjust = 0.5, size=7)) +
  scale_y_continuous( breaks=c(-.2,-.15,-.1,-.05,0,.05,.1,.15,.2), labels=c("-20%","-15%","-10%","-5%","0%","5%","10%","15%","20%"))#+
  	#geom_hline(yintercept=approval_national$delta,linetype="dotted")

trump_approval<-c

######
### Graph of Trump vs. Biden vote
######
approval<-svyby(~vote_pres, ~state+period, srs_design_survey, svymean, na.rm=T)
approval<-select(approval, -se)
approval<-pivot_wider(approval, values_from=vote_pres, names_from=period)
approval$delta<-approval$`1`-approval$`0`

approval<-merge(approval, state_analysis, by.x="state", by.y="abb")
summary(lm(delta~(state_deaths_per_100000), data=approval))
summary(lm(delta~log(state_deaths_per_100000), data=approval))
summary(lm(delta~log(deaths_cumulative), data=approval))
#summary(lm(delta~log(state_deaths_per_100000), data=approval, weight=total_pop))

approval_national<-svyby(~vote_pres, ~period, srs_design_survey, svymean, na.rm=T)
approval_national<-select(approval_national, -se)
approval_national<-pivot_wider(approval_national, values_from=vote_pres, names_from=period)
approval_national$delta<-approval_national$`1`-approval_national$`0`


library(ggplot2)
c <- ggplot(data=subset(approval), aes(log(state_deaths_per_100000),delta, label=state))
c <- c + geom_text(size=2)
c <- c + stat_smooth(method = "lm")
c <- c + ggtitle("Association between COVID and Trump vs. Biden Vote Intent")
c <- c + ylab("Change in Trump Vote Intent 
from January/February to June")
c <- c + xlab("Log(COVID Deaths per 100,000)") 
 c <- c + theme_bw() +theme(axis.title=element_text(size=8),axis.text=element_text(size=8))+
  theme(plot.title = element_text(hjust = 0.5, size=7))+
  scale_y_continuous( breaks=c(-.2,-.15,-.1,-.05,0,.05,.1,.15,.2), labels=c("-20%","-15%","-10%","-5%","0%","5%","10%","15%","20%"))#+
  	#geom_hline(yintercept=approval_national$delta,linetype="dotted")

trumpvote<-c


######
### Graph of House vote
######
approval<-svyby(~vote_house, ~state+period, srs_design_survey, svymean, na.rm=T)
approval<-select(approval, -se)
approval<-pivot_wider(approval, values_from=vote_house, names_from=period)
approval$delta<-approval$`1`-approval$`0`

approval<-merge(approval, state_analysis, by.x="state", by.y="abb")
summary(lm(delta~(state_deaths_per_100000), data=approval))
summary(lm(delta~log(state_deaths_per_100000), data=approval))
summary(lm(delta~log(deaths_cumulative), data=approval))
#summary(lm(delta~log(state_deaths_per_100000), data=approval, weight=total_pop))

approval_national<-svyby(~vote_house, ~period, srs_design_survey, svymean, na.rm=T)
approval_national<-select(approval_national, -se)
approval_national<-pivot_wider(approval_national, values_from=vote_house, names_from=period)
approval_national$delta<-approval_national$`1`-approval_national$`0`


library(ggplot2)
c <- ggplot(data=subset(approval), aes(log(state_deaths_per_100000),delta, label=state))
c <- c + geom_text(size=2)
c <- c + stat_smooth(method = "lm")
c <- c + ggtitle("Association between COVID and House Vote Intent")
c <- c + ylab("Change in Republican Vote Intent 
from January/February to June")
c <- c + xlab("Log(COVID Deaths per 100,000)") 
 c <- c + theme_bw() +theme(axis.title=element_text(size=8),axis.text=element_text(size=8))+
  theme(plot.title = element_text(hjust = 0.5, size=7))+
  scale_y_continuous( breaks=c(-.2,-.15,-.1,-.05,0,.05,.1,.15,.2), labels=c("-20%","-15%","-10%","-5%","0%","5%","10%","15%","20%"))#+
  #	geom_hline(yintercept=approval_national$delta,linetype="dotted")


house<-c

######
### Graph of Senate vote
######
approval<-svyby(~vote_senate, ~state+period, srs_design_survey, svymean, na.rm=T)
approval<-select(approval, -se)
approval<-pivot_wider(approval, values_from=vote_senate, names_from=period)
approval$delta<-approval$`1`-approval$`0`

approval$delta[
approval$state=="CA" | 
approval$state=="CT" |
approval$state=="DC" |
approval$state=="FL" |
approval$state=="HI" |
approval$state=="IN" | 
approval$state=="MD" |
approval$state=="MO" | 
approval$state=="ND" | 
approval$state=="NV" | 
approval$state=="NY" |
approval$state=="OH" |
approval$state=="PA" |
approval$state=="UT" | 
approval$state=="VT" |
approval$state=="WA" | 
approval$state=="WI" ]<-NA

approval<-merge(approval, state_analysis, by.x="state", by.y="abb")
summary(lm(delta~(state_deaths_per_100000), data=approval))
summary(lm(delta~log(state_deaths_per_100000), data=approval))
summary(lm(delta~log(deaths_cumulative), data=approval))
#summary(lm(delta~log(state_deaths_per_100000), data=approval, weight=total_pop))

approval_national<-svyby(~vote_senate, ~period, srs_design_survey, svymean, na.rm=T)
approval_national<-select(approval_national, -se)
approval_national<-pivot_wider(approval_national, values_from=vote_senate, names_from=period)
approval_national$delta<-approval_national$`1`-approval_national$`0`

library(ggplot2)
c <- ggplot(data=subset(approval), aes(log(state_deaths_per_100000),delta, label=state))
c <- c + geom_text(size=2)
c <- c + stat_smooth(method = "lm")
c <- c + ggtitle("Association between COVID and Senate Vote Intent")
c <- c + ylab("Change in Republican Vote Intent 
from January/February to June")
c <- c + xlab("Log(COVID Deaths per 100,000)") 
 c <- c + theme_bw() +theme(axis.title=element_text(size=8),axis.text=element_text(size=8))+
  theme(plot.title = element_text(hjust = 0.5, size=7))+
  scale_y_continuous( breaks=c(-.2,-.15,-.1,-.05,0,.05,.1,.15,.2), labels=c("-20%","-15%","-10%","-5%","0%","5%","10%","15%","20%")) #+
  #	geom_hline(yintercept=approval_national$delta,linetype="dotted")


senate<-c



multiplot <- function(..., plotlist=NULL, file, cols=1, layout=NULL) {
  library(grid)

  # Make a list from the ... arguments and plotlist
  plots <- c(list(...), plotlist)

  numPlots = length(plots)

  # If layout is NULL, then use 'cols' to determine layout
  if (is.null(layout)) {
    # Make the panel
    # ncol: Number of columns of plots
    # nrow: Number of rows needed, calculated from # of cols
    layout <- matrix(seq(1, cols * ceiling(numPlots/cols)),
                    ncol = cols, nrow = ceiling(numPlots/cols))
  }

 if (numPlots==1) {
    print(plots[[1]])

  } else {
    # Set up the page
    grid.newpage()
    pushViewport(viewport(layout = grid.layout(nrow(layout), ncol(layout))))

    # Make each plot, in the correct location
    for (i in 1:numPlots) {
      # Get the i,j matrix positions of the regions that contain this subplot
      matchidx <- as.data.frame(which(layout == i, arr.ind = TRUE))

      print(plots[[i]], vp = viewport(layout.pos.row = matchidx$row,
                                      layout.pos.col = matchidx$col))
    }
  }
}

pdf(file="Figures/figure1_deltas.pdf", height=4)
multiplot(trump_approval, trumpvote, senate, house, cols=2)
dev.off()
